# Data prep
#
# Date last modified:   2025-06-25
# Auhtor:               Christian Vedel 
# Purpose:              This contains functions


# Latex functions 
print_vector_as_table_row = function(x, special = ""){
  to_print = as.character(x[1])
  x = x[-1]
  for(i in x){
    to_print = paste0(to_print, " & ", i)
  }
  to_print = paste0(to_print, " \\\\")
  to_print = paste0(to_print, special)
  cat("\n", to_print)
}


# Converts degrees to km
degrees_to_km = function(return_lat, lat, long, return_both = FALSE) {
  mean_earth_radius =  6371.0088
  to_radians = 1 / 360 * 2 * pi
  
  # Lat
  earth_circumference = mean_earth_radius * 2 * pi
  lat_km = earth_circumference * lat / 360
  
  # Long
  radius_at_lat = mean_earth_radius * cos(lat * to_radians)
  circumference_at_lat = 2 * pi * radius_at_lat
  long_km = long / 360 * circumference_at_lat
  
  if(return_both){
    return(c(lat_km, long_km))
  }
  
  if(return_lat){
    return(lat_km)
  } else {
    return(long_km)
  }
}


# as_fctnum
# Converts a factor to numeric in the expected way
as_fctnum = function(x){
  x = as.character(x)
  x = as.numeric(x)
  return(x)
}


# Binscatter function
binscatter = function(
    df0, 
    n_bins = 50, 
    epsilon = 0.0000001, 
    add_lm = TRUE, 
    segment = TRUE, 
    add_np = TRUE, 
    correct_scale = TRUE
){
  require(gginnards)
  
  # Check quality
  if(any(is.na(df0$y))|any(is.na(df0$x))){
    stop("Found NAs in data")
  }
  
  breaks = quantile(df0$x, seq(0,1, by = 1/n_bins))
  
  # Small negative and positive value to include ends
  breaks[1] = breaks[1] - abs(breaks[1])*epsilon
  breaks[length(breaks)] = breaks[length(breaks)] + abs(breaks[length(breaks)])*epsilon
  
  plot_data = df0 %>% 
    mutate(
      group = cut(x, breaks = breaks)
    ) %>% 
    group_by(group) %>% 
    summarise(
      x_min = min(x),
      x_max = max(x),
      y_min = min(y),
      y_max = max(y),
      x = mean(x),
      y = mean(y)
    )
  
  if(segment){
    p1 = plot_data %>%
      ggplot(aes(x = x_min, xend = x_max, y = y, yend = y)) +
      geom_segment()
  } else {
    p1 = plot_data %>%
      ggplot(aes(x, y)) + 
      geom_point()
  }
  
  
  if(add_lm){ # lm line based on full data
    p1 = p1 + geom_smooth(aes(x,y), data = df0, method = "lm", se = FALSE, inherit.aes = FALSE)
  }
  
  if(add_np){ # np line based on full data
    p1 = p1 + geom_smooth(aes(x,y), data = df0, lty = 2, inherit.aes = FALSE)
  }
  
  # Update lims
  if(correct_scale){
    ylim = c(
      min(plot_data$y),
      max(plot_data$y)
    )
    xlim = c(
      min(plot_data$x),
      max(plot_data$x)
    )
    
    p1 = p1 + lims(
      x = xlim,
      y = ylim
    )
    
    if(add_lm){
      warning("Will override lm line from geom_smooth")
      # Removing layer
      p1 = delete_layers(p1, "GeomSmooth")
      fit = lm(y~x, data = df0)
      
      p1 = p1 + geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2], col = "blue", linewidth = 1, alpha = 0.75)
    }
    
  } 
  
  return(p1)
}

# run_interaction
# Used to get interaction effects
run_interaction = function(r, varname, binder = "_", verbose = TRUE){
  if(r == "QNA") return(NULL)
  
  formula = paste0("MB_ratio ~ IM*", varname, binder, r, " | Creamery_ID + Year + soil_type_Year + NUTS2_year")
  if(verbose) cat("\n ---> Estimating:", formula, "\n")
  formula = as.formula(formula)
  mod1 = feols(
    formula,
    data = reg_data %>% drop_na(all_of(varname))
  )
  
  # Composite effect
  var_name = paste0("IM:", varname, binder, r)
  composite_effect = coef(mod1)["IM"] + coef(mod1)[var_name]
  
  # Composite standard error
  vcov_r = vcov(mod1, cluster = ~ Creamery_ID)[c("IM", var_name), c("IM", var_name)]
  composite_se = sqrt(sum(vcov_r))
  
  res = mod1 %>% 
    coeftable() %>% 
    data.frame() %>% 
    mutate(
      composite_effect = composite_effect, 
      composite_se = composite_se
    )
  
  return(res)
}
